In [1]:
#On va mettre dans une base de données, pour chaque région, les share across 28 maladies du burden et des essais

#1- On fait matrice burden par catégorie
#################################################################################
library(foreach)
library(doParallel)

#GBD 2005
#Data as downloaded from GBD 2010 study, not included in the repository
gbds <- list.files('/media/igna/Elements/HotelDieu/Cochrane/GBD 2010/GBD_2005_2010_by_cause_country_level_GBD2010/2005')
gbds <- gbds[grep('csv',gbds,ignore.case=TRUE)]

cl<-makeCluster(4)
registerDoParallel(cl)

t0 <- proc.time()
A <- foreach(k = gbds) %dopar% {
Mcause <- read.csv(paste('/media/igna/Elements/HotelDieu/Cochrane/GBD 2010/GBD_2005_2010_by_cause_country_level_GBD2010/2005/',k,sep=""))
#Restriction aux données: toutes les ages, tous les sexes, toutes les metriques (dalys, morts, yll, yld), nombre total (sans intervalle de confiance),nb par mill inhab
Mcause[Mcause$age_name=='All ages' & Mcause$sex=='Both sexes',c(1:8,11,12,18)]
}

stopCluster(cl)

DT <- do.call('rbind',A)

(proc.time()-t0)/60
#1.5min


Loading required package: iterators
Loading required package: parallel
        user       system      elapsed 
0.0538166667 0.0004833333 3.0323000000 

In [15]:
table(is.na(DT$causelevel4))
table(DT$causelevel4=="")


 FALSE 
177408 
 FALSE 
177408 

Burden is evaluated at least at level 4


In [16]:
dis4 <- unique(paste(DT$causelevel3,DT$causelevel4))
#diseases evaluated at level 4
isn <- dis4%in%paste(DT$causelevel3,DT$causelevel4)[is.na(DT$causelevel5) | DT$causelevel5==""]
table(isn)

All diseases are evaluated at level 4, and in the 27-class grouping there is no need of level 5, so we suppress it


In [20]:
DT$causelevel5[is.na(DT$causelevel5)] <- ""
DT <- DT[DT$causelevel5=="",]

In [21]:
#Taxonomie à 27 catégories
Mgbd <- read.table("../Data/27_gbd_groups.txt")

In [25]:
table(is.na(DT$causelevel4))


 FALSE 
106756 

In [26]:
#We add diseases evaluated at level 4
dt <- DT[DT$causelevel4%in%as.character(Mgbd$x),]

#List of diseases added
ML <- dt[!duplicated(paste(dt$causelevel1,dt$causelevel2,dt$causelevel3,dt$causelevel4)),]

table(Mgbd$x%in%c(as.character(dt$causelevel4)))
#FALSE  TRUE 
#   15    13


FALSE  TRUE 
   14    13 

In [28]:
#We have to do manually for the others
Mgbd$x[!Mgbd$x%in%c(as.character(dt$causelevel4))]


  1. Diarrhea, lower respiratory infections, meningitis, and other common infectious diseases
  2. Neglected tropical diseases excluding malaria
  3. Maternal disorders
  4. Neonatal disorders
  5. Nutritional deficiencies
  6. Neoplasms
  7. Cardiovascular and circulatory diseases
  8. Chronic respiratory diseases
  9. Cirrhosis of the liver
  10. Digestive diseases (except cirrhosis)
  11. Neurological disorders
  12. Mental and behavioral disorders
  13. Diabetes, urinary diseases and male infertility
  14. Musculoskeletal disorders

In [29]:
#Maternal disorders
Aj <- DT[DT$causelevel3%in%c("Maternal disorders"),]
Mttp <- Aj[!duplicated(paste(Aj$country_name,Aj$measure)),]
Mttp <- Mttp[order(paste(Mttp$country_name,Mttp$measure)),]
Mttp$nm_mean <- tapply(Aj$nm_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$rt_mean <- tapply(Aj$rt_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$causelevel4 <- ""
dt <- rbind(dt,Mttp)
ML <- rbind(ML,Aj[!duplicated(paste(Aj$causelevel1,Aj$causelevel2,Aj$causelevel3,Aj$causelevel4)),])

In [31]:
#Neonatal disorders
Aj <- DT[DT$causelevel3%in%c("Neonatal disorders"),]
Mttp <- Aj[!duplicated(paste(Aj$country_name,Aj$measure)),]
Mttp <- Mttp[order(paste(Mttp$country_name,Mttp$measure)),]
Mttp$nm_mean <- tapply(Aj$nm_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$rt_mean <- tapply(Aj$rt_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$causelevel4 <- ""
dt <- rbind(dt,Mttp)
ML <- rbind(ML,Aj[!duplicated(paste(Aj$causelevel1,Aj$causelevel2,Aj$causelevel3,Aj$causelevel4)),])

In [32]:
#Nutritional deficiencies
Aj <- DT[DT$causelevel3%in%c("Nutritional deficiencies"),]
Mttp <- Aj[!duplicated(paste(Aj$country_name,Aj$measure)),]
Mttp <- Mttp[order(paste(Mttp$country_name,Mttp$measure)),]
Mttp$nm_mean <- tapply(Aj$nm_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$rt_mean <- tapply(Aj$rt_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$causelevel4 <- ""
dt <- rbind(dt,Mttp)
ML <- rbind(ML,Aj[!duplicated(paste(Aj$causelevel1,Aj$causelevel2,Aj$causelevel3,Aj$causelevel4)),])

In [33]:
#Neoplasms
Aj <- DT[DT$causelevel3%in%c("Neoplasms"),]
Mttp <- Aj[!duplicated(paste(Aj$country_name,Aj$measure)),]
Mttp <- Mttp[order(paste(Mttp$country_name,Mttp$measure)),]
Mttp$nm_mean <- tapply(Aj$nm_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$rt_mean <- tapply(Aj$rt_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$causelevel4 <- ""
dt <- rbind(dt,Mttp)
ML <- rbind(ML,Aj[!duplicated(paste(Aj$causelevel1,Aj$causelevel2,Aj$causelevel3,Aj$causelevel4)),])

In [34]:
#Cardiovascular and circulatory diseases
Aj <- DT[DT$causelevel3%in%c("Cardiovascular and circulatory diseases"),]
Mttp <- Aj[!duplicated(paste(Aj$country_name,Aj$measure)),]
Mttp <- Mttp[order(paste(Mttp$country_name,Mttp$measure)),]
Mttp$nm_mean <- tapply(Aj$nm_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$rt_mean <- tapply(Aj$rt_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$causelevel4 <- ""
dt <- rbind(dt,Mttp)
ML <- rbind(ML,Aj[!duplicated(paste(Aj$causelevel1,Aj$causelevel2,Aj$causelevel3,Aj$causelevel4)),])

In [35]:
#Chronic respiratory diseases
Aj <- DT[DT$causelevel3%in%c("Chronic respiratory diseases"),]
Mttp <- Aj[!duplicated(paste(Aj$country_name,Aj$measure)),]
Mttp <- Mttp[order(paste(Mttp$country_name,Mttp$measure)),]
Mttp$nm_mean <- tapply(Aj$nm_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$rt_mean <- tapply(Aj$rt_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$causelevel4 <- ""
dt <- rbind(dt,Mttp)
ML <- rbind(ML,Aj[!duplicated(paste(Aj$causelevel1,Aj$causelevel2,Aj$causelevel3,Aj$causelevel4)),])

In [36]:
#Cirrhosis of the liver
Aj <- DT[DT$causelevel3%in%c("Cirrhosis of the liver"),]
Mttp <- Aj[!duplicated(paste(Aj$country_name,Aj$measure)),]
Mttp <- Mttp[order(paste(Mttp$country_name,Mttp$measure)),]
Mttp$nm_mean <- tapply(Aj$nm_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$rt_mean <- tapply(Aj$rt_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$causelevel4 <- ""
dt <- rbind(dt,Mttp)
ML <- rbind(ML,Aj[!duplicated(paste(Aj$causelevel1,Aj$causelevel2,Aj$causelevel3,Aj$causelevel4)),])

In [38]:
#Digestive diseases (except cirrhosis)
Aj <- DT[DT$causelevel3%in%c("Digestive diseases (except cirrhosis)"),]
Mttp <- Aj[!duplicated(paste(Aj$country_name,Aj$measure)),]
Mttp <- Mttp[order(paste(Mttp$country_name,Mttp$measure)),]
Mttp$nm_mean <- tapply(Aj$nm_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$rt_mean <- tapply(Aj$rt_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$causelevel4 <- ""
dt <- rbind(dt,Mttp)
ML <- rbind(ML,Aj[!duplicated(paste(Aj$causelevel1,Aj$causelevel2,Aj$causelevel3,Aj$causelevel4)),])

In [39]:
#Neurological disorders
Aj <- DT[DT$causelevel3%in%c("Neurological disorders"),]
Mttp <- Aj[!duplicated(paste(Aj$country_name,Aj$measure)),]
Mttp <- Mttp[order(paste(Mttp$country_name,Mttp$measure)),]
Mttp$nm_mean <- tapply(Aj$nm_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$rt_mean <- tapply(Aj$rt_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$causelevel4 <- ""
dt <- rbind(dt,Mttp)
ML <- rbind(ML,Aj[!duplicated(paste(Aj$causelevel1,Aj$causelevel2,Aj$causelevel3,Aj$causelevel4)),])

In [40]:
#Mental and behavioral disorders
Aj <- DT[DT$causelevel3%in%c("Mental and behavioral disorders"),]
Mttp <- Aj[!duplicated(paste(Aj$country_name,Aj$measure)),]
Mttp <- Mttp[order(paste(Mttp$country_name,Mttp$measure)),]
Mttp$nm_mean <- tapply(Aj$nm_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$rt_mean <- tapply(Aj$rt_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$causelevel4 <- ""
dt <- rbind(dt,Mttp)
ML <- rbind(ML,Aj[!duplicated(paste(Aj$causelevel1,Aj$causelevel2,Aj$causelevel3,Aj$causelevel4)),])

In [41]:
#Musculoskeletal disorders
Aj <- DT[DT$causelevel3%in%c("Musculoskeletal disorders"),]
Mttp <- Aj[!duplicated(paste(Aj$country_name,Aj$measure)),]
Mttp <- Mttp[order(paste(Mttp$country_name,Mttp$measure)),]
Mttp$nm_mean <- tapply(Aj$nm_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$rt_mean <- tapply(Aj$rt_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$causelevel4 <- ""
dt <- rbind(dt,Mttp)
ML <- rbind(ML,Aj[!duplicated(paste(Aj$causelevel1,Aj$causelevel2,Aj$causelevel3,Aj$causelevel4)),])

In [42]:
#Diarrhea, lower respiratory infections, meningitis, and other common infectious diseases
Aj <- DT[DT$causelevel3%in%c("Diarrhea, lower respiratory infections, meningitis, and other common infectious diseases"),]
Mttp <- Aj[!duplicated(paste(Aj$country_name,Aj$measure)),]
Mttp <- Mttp[order(paste(Mttp$country_name,Mttp$measure)),]
Mttp$nm_mean <- tapply(Aj$nm_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$rt_mean <- tapply(Aj$rt_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$causelevel4 <- ""
dt <- rbind(dt,Mttp)
ML <- rbind(ML,Aj[!duplicated(paste(Aj$causelevel1,Aj$causelevel2,Aj$causelevel3,Aj$causelevel4)),])

In [43]:
#Neglected tropical diseases excluding malaria
Aj <- DT[DT$causelevel3%in%c("Neglected tropical diseases and malaria") & !DT$causelevel4%in%c("Malaria"),]
Mttp <- Aj[!duplicated(paste(Aj$country_name,Aj$measure)),]
Mttp <- Mttp[order(paste(Mttp$country_name,Mttp$measure)),]
Mttp$nm_mean <- tapply(Aj$nm_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$rt_mean <- tapply(Aj$rt_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$causelevel4 <- "Neglected tropical diseases excluding malaria"
dt <- rbind(dt,Mttp)
ML <- rbind(ML,Aj[!duplicated(paste(Aj$causelevel1,Aj$causelevel2,Aj$causelevel3,Aj$causelevel4)),])

In [44]:
#Diabetes, urinary diseases and male infertility
Aj <- DT[DT$causelevel4%in%c("Diabetes mellitus","Acute glomerulonephritis","Urinary diseases and male infertility","Chronic kidney diseases"),]
Mttp <- Aj[!duplicated(paste(Aj$country_name,Aj$measure)),]
Mttp <- Mttp[order(paste(Mttp$country_name,Mttp$measure)),]
Mttp$nm_mean <- tapply(Aj$nm_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$rt_mean <- tapply(Aj$rt_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$causelevel4 <- "Diabetes, urinary diseases and male infertility"
dt <- rbind(dt,Mttp)
ML <- rbind(ML,Aj[!duplicated(paste(Aj$causelevel1,Aj$causelevel2,Aj$causelevel3,Aj$causelevel4)),])

In [46]:
table(Mgbd$x%in%c(as.character(dt$causelevel2[dt$causelevel3==""]),as.character(dt$causelevel3[dt$causelevel4==""]),as.character(dt$causelevel4[dt$causelevel5==""])))
table(Mgbd$x%in%c(as.character(dt$causelevel2[dt$causelevel3!=""]),as.character(dt$causelevel3[dt$causelevel4!=""]),as.character(dt$causelevel4[dt$causelevel5!=""])))
U <- unique(paste(dt$causelevel1,dt$causelevel2,dt$causelevel3,dt$causelevel4,dt$causelevel5))
length(U)


TRUE 
  27 
FALSE 
   27 
27

Ok, we have the 27 groups of diseases


In [50]:
#Diseases not included
table(dis4%in%paste(ML$causelevel3,ML$causelevel4))
sort(dis4[!dis4%in%paste(ML$causelevel3,ML$causelevel4)])


FALSE  TRUE 
   16   139 
  1. 'Diabetes, urogenital, blood, and endocrine diseases Other endocrine, nutritional, blood, and immune disorders'
  2. 'Forces of nature, war, and legal intervention Collective violence and legal intervention'
  3. 'Forces of nature, war, and legal intervention Exposure to forces of nature'
  4. 'Other communicable, maternal, neonatal, and nutritional disorders Other infectious diseases'
  5. 'Self-harm and interpersonal violence Interpersonal violence'
  6. 'Self-harm and interpersonal violence Self-harm'
  7. 'Transport injuries Other transport injury'
  8. 'Transport injuries Road injury'
  9. 'Unintentional injuries other than transport injuries Adverse effects of medical treatment'
  10. 'Unintentional injuries other than transport injuries Animal contact'
  11. 'Unintentional injuries other than transport injuries Drowning'
  12. 'Unintentional injuries other than transport injuries Exposure to mechanical forces'
  13. 'Unintentional injuries other than transport injuries Falls'
  14. 'Unintentional injuries other than transport injuries Fire, heat and hot substances'
  15. 'Unintentional injuries other than transport injuries Poisonings'
  16. 'Unintentional injuries other than transport injuries Unintentional injuries not classified elsewhere'

Not included in the 27-class grouping:

  • Injuries
  • Other endocrine, nutritional, blood, and immune disorders
  • Other infectious diseases

These two residual categories are in particular excluded because they are not included in the arrival space of the classifier, because too complicated


In [53]:
#We add excluded categories to dt to evaluate what amount of burden we are excluding
dt$cats27 <- TRUE

In [54]:
#Injuries
Aj <- DT[DT$causelevel2%in%c("Injuries"),]
Mttp <- Aj[!duplicated(paste(Aj$country_name,Aj$measure)),]
Mttp <- Mttp[order(paste(Mttp$country_name,Mttp$measure)),]
Mttp$nm_mean <- tapply(Aj$nm_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$rt_mean <- tapply(Aj$rt_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$causelevel4 <- ""
Mttp$causelevel3 <- ""
Mttp$cats27 <- FALSE
dt <- rbind(dt,Mttp)
ML <- rbind(ML,Aj[!duplicated(paste(Aj$causelevel1,Aj$causelevel2,Aj$causelevel3,Aj$causelevel4)),])

In [55]:
#Residual categories
Aj <- DT[DT$causelevel4%in%c("Other endocrine, nutritional, blood, and immune disorders","Other infectious diseases"),]
Mttp <- Aj[!duplicated(paste(Aj$country_name,Aj$measure)),]
Mttp <- Mttp[order(paste(Mttp$country_name,Mttp$measure)),]
Mttp$nm_mean <- tapply(Aj$nm_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$rt_mean <- tapply(Aj$rt_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$causelevel4 <- "Excluded residual categories"
Mttp$cats27 <- FALSE
dt <- rbind(dt,Mttp)
ML <- rbind(ML,Aj[!duplicated(paste(Aj$causelevel1,Aj$causelevel2,Aj$causelevel3,Aj$causelevel4)),])

In [56]:
table(dis4%in%paste(ML$causelevel3,ML$causelevel4))


TRUE 
 155 

Ok, we included all the burden


In [57]:
#We include variable Diseases = last causelevel without ""
dt$Disease <- as.character(dt$causelevel4)
dt$Disease[dt$Disease==""] <- as.character(dt$causelevel3[dt$Disease==""])
dt$Disease[dt$Disease==""] <- as.character(dt$causelevel2[dt$Disease==""])

In [58]:
table(dt$Disease%in%as.character(Mgbd$x))
length(unique(dt$Dis))


FALSE  TRUE 
 1496 19416 
29

Ok: 27 + injuries and residual


In [59]:
dt$Sup_region <- dt$region
levels(dt$region)


  1. 'Andean Latin America'
  2. 'Australasia'
  3. 'Caribbean'
  4. 'Central Asia'
  5. 'Central Europe '
  6. 'Central Latin America'
  7. 'Central Sub-Saharan Africa'
  8. 'East Asia'
  9. 'Eastern Europe'
  10. 'Eastern Sub-Saharan Africa'
  11. 'High-income Asia Pacific'
  12. 'High-income North America'
  13. 'North Africa and Middle East'
  14. 'Oceania'
  15. 'South Asia'
  16. 'Southeast Asia '
  17. 'Southern Latin America'
  18. 'Southern Sub-Saharan Africa'
  19. 'Tropical Latin America'
  20. 'Western Europe'
  21. 'Western Sub-Saharan Africa'

In [60]:
levels(dt$Sup_region) <- c(
"Latin America and Caribbean", "High-income", 
"Latin America and Caribbean", "Central Europe, Eastern Europe, and Central Asia", 
"Central Europe, Eastern Europe, and Central Asia", "Latin America and Caribbean",
"Sub-Saharian Africa", "Southeast Asia, East Asia and Oceania",
"Central Europe, Eastern Europe, and Central Asia", "Sub-Saharian Africa",
"High-income", "High-income",
"North Africa and Middle East", "Southeast Asia, East Asia and Oceania",
"South Asia", "Southeast Asia, East Asia and Oceania",
"High-income", "Sub-Saharian Africa",
"Latin America and Caribbean", "High-income",
"Sub-Saharian Africa")

#Region level
GBD <- dt
meas <- levels(GBD$mes)
dis <- levels(GBD$Dis)
reg <- levels(GBD$Sup_reg)

S <- tapply(GBD$nm_mean,paste(GBD$meas,GBD$Sup_region,GBD$Disease,sep="&"),sum)

L <- strsplit(names(S),"&")

M <- data.frame(do.call('rbind',L))
names(M) <- c("metr","Region","Disease")
M$burden <- as.numeric(S)

In [61]:
write.table(M,"../Data/DALY_YLL_deaths_per_region_and_27_and_excluded_diseases_2005.txt")

In [ ]: